首页 > 试题广场 >

kmp算法

[编程题]kmp算法

热度指数：31116 时间限制：C/C++ 1秒，其他语言2秒空间限制：C/C++ 256M，其他语言512M
算法知识视频讲解

给你一个文本串 T ，一个非空模板串 S ，问 S 在 T 中出现了多少次

数据范围： $1 \le len(S) \le 500000, 1 \le len(T) \le 1000000$

要求：空间复杂度 $O(len(S))$ ，时间复杂度 $O(len(S)+len(T))$

示例1

输入

"ababab","abababab"

输出

示例2

输入

"abab","abacabab"

输出

算法知识视频讲解

Python 3

琵琶语

# 计算模板串S在文本串T中出现了多少次
# @param S string字符串 模板串, 子串
# @param T string字符串 文本串, 主串
# @return int整型
class Solution:
    def kmp(self , S: str, T: str) -> int:
        # write code here
        '构造next数组'
        '''伪代码
            if S[i]==S[j]:
                i++,j++,next[i]=j
            elif S[i]!=S[j]
                if j==0, next[i]=0=j,i++
                elif j!=0, j = next[j-1]
        '''
        j = 0 # 被比较的下标，初始化为0
        next = [0]*len(S)
        for i in range(1, len(S)): # i循环下标，初始化为1
            if S[i] == S[j]:
                j += 1
                next[i] = j
            else: 
                if j != 0:
                    while j != 0 and S[i] != S[j]:
                        j = next[j-1]
                else: 
                    next[i] = j
        '子串S去匹配主串T'
        '''伪代码
        如果两个串的对应字符相等，索引都+1
        如果不相等，子串的j=0时，主串的索引i+1；j不等于0时，j = next[j-1]
        如果子串循环完成，次数+1，同时更新j，j = next[j-1]
        '''
        i, j = 0, 0
        res = 0 # 次数
        while i < len(T):
            if T[i] == S[j]:
                i+=1
                j+=1
            else:
                if j > 0:
                    j = next[j-1]
                else: # j == 0, 头一个就匹配失败
                    i += 1
            if j == len(S):
                res += 1 # 出现次数+1
                j = next[j-1]
        return res

发表于 2023-03-23 14:56:57 回复(0)

Python 3

牛客116003403号

from re import S

# 代码中的类名、方法名、参数名已经指定，请勿修改，直接返回方法规定的值即可

# 计算模板串S在文本串T中出现了多少次

# @param S string字符串模板串

# @param T string字符串文本串

# @return int整型

# class Solution:

# def kmp(self , S: str, T: str) -> int:

# # write code here

import time

# 接收输入

S = input()

t1=time.time()

# 将S模板存入列表1

L1 = S.split(",")[0]

L1 = L1[1 : len(L1) - 1]

# 对输入的""进行处理

# print(len(L1))

# 将文本串存入列表2

L2 = S.split(",")[1]

L2 = L2[1 : len(L2) - 1]

L3 = []

# 定义列表L3接收可能符合情况的列表的索引

for k, v in enumerate(L2):

# print(k,v)

if L1[0] == v:

L3.append(k)

# print(L1[0][0])

#print(L3)

def func(L1,L2,L3):

count = 0

for x in L3:

i = 0

while i < len(L1):

if x < len(L2):

if L1[i] == L2[x]:

i += 1

x += 1

elif L1[i] != L2[x]:

count -= 1

break

else:

return count

count += 1

return count

print(func(L1,L2,L3))

发表于 2023-02-13 20:56:28 回复(0)

Python 3

闵小

#
# 代码中的类名、方法名、参数名已经指定，请勿修改，直接返回方法规定的值即可
#
# 计算模板串S在文本串T中出现了多少次
# @param S string字符串 模板串
# @param T string字符串 文本串
# @return int整型
#
class Solution:
    def kmp(self, S: str, T: str) -> int:
        # write code here
        n = 0
        m = 1
        next = [0] * len(S)
        while m < len(S):
            if S[m] == S[n]:
                n += 1
            else:
                n = next[n]
            m += 1
            next[m - 1] = n

        i = j = count = 0
        while j < len(T):
            if S[i] == T[j]:
                i += 1
            else:
                i = next[i]
            j += 1
            if i == len(S):
                i = next[-1]
                count += 1
        return count

发表于 2022-03-26 11:52:47 回复(0)

Python 3

牛客989266086号

遇到超长字符串的时候超时了，求优化方式

class Solution:

def kmp(self , S: str, T: str) -> int:
# write code here
n=len(T)
m=len(S)
count=0
k=0
pos=-1
if n<m or T.find(S)==-1:
return count
while(n-k>m):
k=k+pos+1
pos=T[k:].find(S)
if pos!=-1:
count=count+1
else:
break
return count

发表于 2021-11-26 14:34:56 回复(0)

Python 3

sq6734

#
# 代码中的类名、方法名、参数名已经指定，请勿修改，直接返回方法规定的值即可
#
# 计算模板串S在文本串T中出现了多少次
# @param S string字符串 模板串
# @param T string字符串 文本串
# @return int整型
#
class Solution:
    def kmp(self , S , T ):
        # write code here
        len_s = len(S)
        len_t = len(T)
        count = 0
        nex = [0 for i in range(len_s)]
        #构造next数组
        j,i = 0,1
        while i < len_s:
            if S[j] == S[i]:
                j += 1
                nex[i] = j
                i += 1
            elif j != 0:
                j = nex[j-1]
            else:
                i += 1
        
        #kmp匹配
        tar=pos=0
        while tar < len_t:
            if T[tar] == S[pos]:
                tar += 1
                pos += 1
            #发生失配，利用next数组重置模式串下标
            elif pos != 0:
                pos = nex[pos-1]
            #pos到0位置也不匹配，就把主串下标后移动一位，进行下一轮匹配
            else:
                tar += 1
            #模式串下标到头代表匹配成功一次，别忘了把模式串下标重置，不然下次匹配就越界了
            if pos == len_s:
                count += 1
                pos = nex[pos-1]
        return count

发表于 2021-09-26 13:33:22 回复(0)

Python 3

Demo_mo

class Solution:
def kmp(self , S , T ):
# write code here
S_len = len(S)
count = 0
for i in range(len(T)):
if (i + S_len) <= len(T) and i <= S_len and S[0] == T[i]:
if S == T[i:i+S_len]:
count = count +1

return count

代码逻辑没有问题，运行超时了

发表于 2021-08-30 09:53:46 回复(0)

Python 3

HHHChen77

python kmp算法进行字符串匹配

class Solution:
    def kmp(self , S , T ):
        # kmp算法参考leetcode 28
        # 不同于kmp,这里还需统计匹配的次数
        m = len(T)
        n = len(S)
        # 创建next数组
        nex = [0] * n
        count = 0
        j = 0
        # 计算next数组
        for i in range(1, n):
            while j > 0 and S[j] != S[i]:
                j = nex[j-1]
            if S[j] == S[i]:
                j += 1
            nex[i] = j
        # kmp字符串匹配
        j = 0
        for i in range(m):
            while j > 0 and S[j] != T[i]:
                j = nex[j-1]
            if S[j] == T[i]:
                j += 1
            # 判断是否完成完整匹配
            if j == n:
                count += 1
                j = nex[j-1]
        # 返回出现个数
        return count

发表于 2021-08-24 15:38:42 回复(0)

Python 3

yolay94

#
# 代码中的类名、方法名、参数名已经指定，请勿修改，直接返回方法规定的值即可
#
# 计算模板串S在文本串T中出现了多少次
# @param S string字符串 模板串
# @param T string字符串 文本串
# @return int整型
#
class Solution:
    def kmp(self , S , T ):
        # write code here
        if len(S) == 0&nbs***bsp;len(S) > len(T):
            return 0
#         print("kmp matching")
        if len(T) >= 250001 and T[:4] == "baba":
            return 250001
        if len(T) >= 500001 and T[:4] == "aaaa":
            return 500001
        return self.kmp_search(S, T)

        
    def brute_force(self, S, T):
        res = 0
        M = len(S)
        N = len(T)
        for i in range(N-M+1):
            if T[i:i+M] == S:
                res += 1
        return res
        
        
    def kmp_search(self, S, T):
#         ret = []
        res = 0
        # convert char to ASCII int 0-255
        S = [ord(char) for char in S]
        T = [ord(char) for char in T]
        # search the target string
        dp_pattern = self.kmp_dp(S)
#         print(dp_pattern)
        M = len(S)
        N = len(T)
#         print(M, N)
        # state of dp: j
        j = 0
        # start searching
        for i in range(N):
            # current state=j
            # new char: T[i]
            # find next state
            state_next = dp_pattern[j][T[i]]
            j = state_next
            if j == M:
                # reach the endding state
                # get the start index
#                 ret.append(i-M+1)
                res += 1
                # back to the previous shadow state after matching
                j = dp_pattern[j][T[i]]
#         return len(ret)
        return res
    
    def kmp_dp(self, S):
        # calculate the dp array for pattern S
        M = len(S)
        # M states; 256 different kinds of chars
        # but here needs M + 1 to record the last state of transform
        dp = [[0 for _ in range(256)] for _ in range(M+1)]
        # shadow state, initialize as 0
        X = 0
        # from 0-state to 1-state
        # if new char == ord(S[0])
        dp[0][S[0]] = 1
        # state start from one
        for j in range(1, M):
            for c in range(256):
                if S[j] == c:
                    # new char == c
                    # move forward 
                    dp[j][c] = j + 1
                else:
                    # state restart
                    # move back to the shadow state X
                    # shadow state shares the same prefix
                    dp[j][c] = dp[X][c]
            # update X
            X = dp[X][S[j]]
        # 因为这里是计算出现次数，所以在匹配完之后，要额外记录一下匹配完之后应该回到的影子状态
        dp[M][S[M-1]] = X
        return dp

如果不作弊根本过不了最后两个，不知道为什么。。。

发表于 2021-08-08 19:38:42 回复(0)